Code
library(tidyverse)
library(gapminder)
library(gt)
library(gtsummary)
library(ggrepel)
library(plotly)
library(sjPlot)Last update: 2024-04-30 11:50:30
library(tidyverse)
library(gapminder)
library(gt)
library(gtsummary)
library(ggrepel)
library(plotly)
library(sjPlot)This report uses the Gapminder dataset, which includes real-world country-level panel data (1952-2007) for:
Life expectancy (lifeExp)
Population size (pop)
GDP percapita (gdpPercap)
The dataset is available from gapminder r package. For more information on gapminder see: https://www.gapminder.org/
gapminder %>%
glimpse()Rows: 1,704
Columns: 6
$ country <fct> "Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", …
$ continent <fct> Asia, Asia, Asia, Asia, Asia, Asia, Asia, Asia, Asia, Asia, …
$ year <int> 1952, 1957, 1962, 1967, 1972, 1977, 1982, 1987, 1992, 1997, …
$ lifeExp <dbl> 28.801, 30.332, 31.997, 34.020, 36.088, 38.438, 39.854, 40.8…
$ pop <int> 8425333, 9240934, 10267083, 11537966, 13079460, 14880372, 12…
$ gdpPercap <dbl> 779.4453, 820.8530, 853.1007, 836.1971, 739.9811, 786.1134, …
The following table summarizes the main variables across the years.
gapminder %>%
select(-country) %>%
tbl_summary(by = year)| Characteristic | 1952, N = 1421 | 1957, N = 1421 | 1962, N = 1421 | 1967, N = 1421 | 1972, N = 1421 | 1977, N = 1421 | 1982, N = 1421 | 1987, N = 1421 | 1992, N = 1421 | 1997, N = 1421 | 2002, N = 1421 | 2007, N = 1421 |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| continent | ||||||||||||
| Africa | 52 (37%) | 52 (37%) | 52 (37%) | 52 (37%) | 52 (37%) | 52 (37%) | 52 (37%) | 52 (37%) | 52 (37%) | 52 (37%) | 52 (37%) | 52 (37%) |
| Americas | 25 (18%) | 25 (18%) | 25 (18%) | 25 (18%) | 25 (18%) | 25 (18%) | 25 (18%) | 25 (18%) | 25 (18%) | 25 (18%) | 25 (18%) | 25 (18%) |
| Asia | 33 (23%) | 33 (23%) | 33 (23%) | 33 (23%) | 33 (23%) | 33 (23%) | 33 (23%) | 33 (23%) | 33 (23%) | 33 (23%) | 33 (23%) | 33 (23%) |
| Europe | 30 (21%) | 30 (21%) | 30 (21%) | 30 (21%) | 30 (21%) | 30 (21%) | 30 (21%) | 30 (21%) | 30 (21%) | 30 (21%) | 30 (21%) | 30 (21%) |
| Oceania | 2 (1.4%) | 2 (1.4%) | 2 (1.4%) | 2 (1.4%) | 2 (1.4%) | 2 (1.4%) | 2 (1.4%) | 2 (1.4%) | 2 (1.4%) | 2 (1.4%) | 2 (1.4%) | 2 (1.4%) |
| lifeExp | 45 (39, 60) | 48 (41, 63) | 51 (43, 65) | 54 (46, 67) | 57 (49, 69) | 60 (50, 70) | 62 (53, 71) | 66 (55, 72) | 68 (56, 73) | 69 (56, 74) | 71 (56, 75) | 72 (57, 76) |
| pop | 3,943,953 (1,452,026, 9,168,198) | 4,282,942 (1,568,811, 9,817,598) | 4,686,040 (1,784,362, 10,980,084) | 5,170,176 (2,034,768, 12,614,585) | 5,877,996 (2,351,192, 14,679,200) | 6,404,036 (2,759,717, 16,670,227) | 7,007,320 (3,006,286, 18,407,325) | 7,774,862 (3,194,990, 20,947,542) | 8,688,686 (3,605,992, 22,705,382) | 9,735,064 (3,770,150, 24,311,370) | 10,372,918 (4,173,506, 26,545,556) | 10,517,531 (4,508,034, 31,210,042) |
| gdpPercap | 1,969 (865, 3,913) | 2,173 (931, 4,876) | 2,335 (1,059, 5,709) | 2,678 (1,151, 7,076) | 3,339 (1,257, 9,509) | 3,799 (1,357, 11,204) | 4,216 (1,363, 12,348) | 4,280 (1,327, 11,994) | 4,386 (1,271, 10,684) | 4,782 (1,367, 12,023) | 5,320 (1,410, 13,360) | 6,124 (1,625, 18,009) |
| 1 n (%); Median (IQR) | ||||||||||||
gapminder %>%
ggplot(aes(x=factor(year), y=lifeExp)) +
geom_boxplot()+
stat_summary(fun.y = "mean", geom = "point",color="lightblue",size=3)+
theme_classic() +
labs(title = "Life expectancy increase over time",
x = "",
y = "Country life expectancy",
caption = "Source:Gapminder") gapminder %>%
filter(year == 2007) %>%
ggplot(aes(x=lifeExp)) +
geom_histogram(bins=12,fill="lightblue",color="white")+
theme_classic() +
guides(color = FALSE) +
labs(title = "Life expectancy variation in 2007",
x = "Country life expectancy",
y = "Frequency",
caption = "Source:Gapminder") gapminder %>%
ggplot(aes(x=lifeExp)) +
geom_histogram(bins=12,fill="lightblue",color="white")+
theme_classic() +
facet_wrap(vars(year))+
guides(color = FALSE) +
labs(title = "Life expectancy variation over time",
x = "Country life expectancy",
y = "Frequency",
caption = "Source:Gapminder") gapminder %>%
ggplot(aes(x=continent, y=lifeExp)) +
geom_boxplot()+
stat_summary(fun.y = "mean", geom = "point",color="lightblue",size=3)+
theme_classic() +
labs(title = "Life expectancy across continents in 2007",
x = "",
y = "Country life expectancy",
caption = "Source:Gapminder") gapminder %>%
mutate(country_israel = ifelse(country=="Israel","Israel","")) %>%
ggplot(aes(x=year, y=lifeExp, group=country, color = country_israel,size = country_israel)) +
geom_line(alpha=0.5)+
scale_colour_manual(name = "", values=c("lightblue","black"))+
scale_size_manual(name = "", values=c(0.2,2))+
theme_classic()+
labs(title = "Life expectancy increase across countries",
x = "",
y = "Country life expectancy",
caption = "Source:Gapminder") gapminder_lifeExp_tab <- gapminder %>%
group_by(continent, year) %>%
summarise(lifeExp=median(lifeExp))
gapminder_lifeExp_tab %>%
ggplot(aes(x=year, y=lifeExp, color=continent)) +
geom_line(size=1) +
geom_point(size=1.5) +
geom_label_repel(data = gapminder_lifeExp_tab %>% filter(year == 2007), aes(label = continent),
nudge_x = 1,
nudge_y = 1) +
theme_classic() +
guides(color = FALSE) +
labs(title = "Life expectancy increase across continents",
x = "",
y = "Country median life expectancy",
caption = "Source:Gapminder") gapminder %>%
ggplot(aes(x=factor(year), y=gdpPercap)) +
geom_boxplot()+
stat_summary(fun.y = "mean", geom = "point",color="lightblue",size=3)+
theme_classic() +
labs(title = "GPD per capaita increase over time",
x = "",
y = "Country GPD per capaita",
caption = "Source:Gapminder") You can see that there’s an outlier
gapminder %>%
filter(year == 2007) %>%
ggplot(aes(x=gdpPercap)) +
geom_histogram(bins=12,fill="lightblue",color="white")+
theme_classic() +
guides(color = FALSE) +
labs(title = "Variation in GDP per capita 2007",
x = "Country GPD per capaita",
y = "Frequency",
caption = "Source:Gapminder") gapminder %>%
ggplot(aes(x=gdpPercap)) +
geom_histogram(bins=12,fill="lightblue",color="white")+
theme_classic() +
facet_wrap(vars(year))+
xlim(0,60000)+
guides(color = FALSE) +
labs(title = "Variation in GDP per capita over time",
x = "Country GPD per capaita",
y = "Frequency",
caption = "Source:Gapminder") gapminder %>%
filter(year==2007) %>%
ggplot(aes(x=continent, y=gdpPercap)) +
geom_boxplot()+
stat_summary(fun.y = "mean", geom = "point",color="lightblue",size=3)+
theme_classic() +
labs(title = "GDP per capita across continents in 2007",
x = "",
y = "Country GPD per capaita",
caption = "Source:Gapminder") gapminder %>%
mutate(country_israel = ifelse(country=="Israel","Israel","")) %>%
ggplot(aes(x=year, y=gdpPercap, group=country, color = country_israel,size = country_israel)) +
geom_line(alpha=0.7)+
scale_colour_manual(name = "", values=c("lightblue","black"))+
scale_size_manual(name = "", values=c(0.2,2))+
theme_classic()+
labs(title = "GPD per capaita increase across countries",
x = "",
y = "Country GPD per capaita",
caption = "Source:Gapminder") gapminder_gdpPercap_tab <- gapminder %>%
group_by(continent, year) %>%
summarise(gdpPercap=median(gdpPercap))
gapminder_gdpPercap_tab %>%
ggplot(aes(x=year, y=gdpPercap, color=continent)) +
geom_line(size=1) +
geom_point(size=1.5) +
geom_label_repel(data = gapminder_gdpPercap_tab %>% filter(year == 2007), aes(label = continent),
nudge_x = 1,
nudge_y = 1) +
theme_classic() +
guides(color = FALSE) +
labs(title = "GPD per capaita increase across continents",
x = "",
y = "Country GPD per capaita",
caption = "Source:Gapminder") gapminder_gdpPercap_tab <- gapminder %>%
filter(country %in% c("Israel","Lebanon","Greece","Turkey","Syria")) %>%
group_by(country, year) %>%
summarise(gdpPercap=median(gdpPercap))
gapminder_gdpPercap_tab %>%
ggplot(aes(x=year, y=gdpPercap, color=country)) +
geom_line(size=1) +
geom_point(size=1.5) +
geom_label_repel(data = gapminder_gdpPercap_tab %>% filter(year == 2007), aes(label = country),
nudge_x = 1,
nudge_y = 1) +
theme_classic() +
guides(color = FALSE) +
labs(title = "GPD per capaita increase across continents",
x = "",
y = "Country GPD per capaita",
caption = "Source:Gapminder") 2007
gapminder %>%
filter(year==2007) %>%
ggplot(aes(x=gdpPercap,y=lifeExp,color=continent,size=pop))+
geom_point()+
geom_text(aes(label=country),size=4,hjust=-0.1,vjust=-0.2,color="black",
data = gapminder %>% filter(year==2007,country=="Israel"))+
theme_classic()+
guides(color = FALSE,size=FALSE) +
labs(title = "Link between GDP per capita and life expectancy across continents",
x = "GDP per capita",
y = "Life expectancy",
caption = "Source:Gapminder") p1 <- gapminder %>%
filter(year==2007) %>%
ggplot(aes(x=gdpPercap,y=lifeExp,color=continent,size=pop, text = paste("country:", country)))+
geom_point()+
geom_text(aes(label=country),size=4,hjust=-0.1,vjust=-0.2,color="black",
data = gapminder %>% filter(year==2007,country=="Israel"))+
theme_classic()+
guides(color = FALSE,size=FALSE) +
labs(title = "Link between GDP per capita and life expectancy across continents",
x = "GDP per capita",
y = "Life expectancy",
caption = "Source:Gapminder")
ggplotly(p1)gapminder %>%
filter(year==2007) %>%
ggplot(aes(x=gdpPercap,y=lifeExp,color=continent,size=pop))+
geom_point()+
geom_smooth(aes(group=1))+
geom_text(aes(label=country),size=4,hjust=-0.1,vjust=-0.2,color="black",
data = gapminder %>% filter(year==2007,country=="Israel"))+
theme_classic()+
guides(color = FALSE,size=FALSE) +
labs(title = "Link between GDP per capita and life expectancy across continents",
x = "GDP per capita",
y = "Life expectancy",
caption = "Source:Gapminder") gapminder %>%
filter(year==2007) %>%
ggplot(aes(x=log(gdpPercap),y=lifeExp,color=continent,size=pop))+
geom_point()+
geom_smooth(method="lm")+
geom_text(aes(label=country),size=4,hjust=-0.1,vjust=-0.2,color="black",
data = gapminder %>% filter(year==2007,country=="Israel"))+
theme_classic()+
guides(color = FALSE,size=FALSE) +
labs(title = "Link between GDP per capita and life expectancy across continents",
x = "GDP per capita (log)",
y = "Life expectancy",
caption = "Source:Gapminder") tmod1 <- lm(lifeExp ~ log(gdpPercap) + year + continent, data=gapminder)
tab_model(tmod1)| life Exp | |||
| Predictors | Estimates | CI | p |
| (Intercept) | -465.87 | -498.57 – -433.17 | <0.001 |
| gdpPercap [log] | 5.02 | 4.71 – 5.34 | <0.001 |
| year | 0.24 | 0.22 – 0.26 | <0.001 |
| continent [Americas] | 8.93 | 8.02 – 9.83 | <0.001 |
| continent [Asia] | 7.06 | 6.29 – 7.84 | <0.001 |
| continent [Europe] | 12.51 | 11.51 – 13.51 | <0.001 |
| continent [Oceania] | 12.75 | 10.25 – 15.25 | <0.001 |
| Observations | 1704 | ||
| R2 / R2 adjusted | 0.798 / 0.797 | ||
plot_model(tmod1)plot_model(tmod1,type = "std")plot_model(tmod1,type = "pred")$gdpPercap
$year
$continent
tmod2 <- tmod1 %>% update(.~. + continent* (log(gdpPercap) + year))
tab_model(tmod1,tmod2)| life Exp | life Exp | |||||
| Predictors | Estimates | CI | p | Estimates | CI | p |
| (Intercept) | -465.87 | -498.57 – -433.17 | <0.001 | -478.42 | -529.18 – -427.66 | <0.001 |
| gdpPercap [log] | 5.02 | 4.71 – 5.34 | <0.001 | 4.85 | 4.33 – 5.37 | <0.001 |
| year | 0.24 | 0.22 – 0.26 | <0.001 | 0.25 | 0.22 – 0.27 | <0.001 |
| continent [Americas] | 8.93 | 8.02 – 9.83 | <0.001 | -40.71 | -131.67 – 50.25 | 0.380 |
| continent [Asia] | 7.06 | 6.29 – 7.84 | <0.001 | -163.80 | -247.02 – -80.59 | <0.001 |
| continent [Europe] | 12.51 | 11.51 – 13.51 | <0.001 | 306.46 | 214.37 – 398.55 | <0.001 |
| continent [Oceania] | 12.75 | 10.25 – 15.25 | <0.001 | 221.30 | -465.04 – 907.63 | 0.527 |
| gdpPercap [log] × continent [Americas] |
2.46 | 1.33 – 3.59 | <0.001 | |||
| gdpPercap [log] × continent [Asia] |
-0.02 | -0.71 – 0.66 | 0.948 | |||
| gdpPercap [log] × continent [Europe] |
0.09 | -1.01 – 1.20 | 0.867 | |||
| gdpPercap [log] × continent [Oceania] |
-1.61 | -25.81 – 22.58 | 0.896 | |||
| year × continent [Americas] |
0.01 | -0.03 – 0.06 | 0.547 | |||
| year × continent [Asia] | 0.09 | 0.04 – 0.13 | <0.001 | |||
| year × continent [Europe] | -0.15 | -0.20 – -0.10 | <0.001 | |||
| year × continent [Oceania] |
-0.10 | -0.56 – 0.36 | 0.678 | |||
| Observations | 1704 | 1704 | ||||
| R2 / R2 adjusted | 0.798 / 0.797 | 0.813 / 0.811 | ||||
plot_model(tmod2,type = "pred", terms = c("year","continent"))anova(tmod1,tmod2)Analysis of Variance Table
Model 1: lifeExp ~ log(gdpPercap) + year + continent
Model 2: lifeExp ~ log(gdpPercap) + year + continent + log(gdpPercap):continent +
year:continent
Res.Df RSS Df Sum of Sq F Pr(>F)
1 1697 57348
2 1689 53159 8 4189.6 16.639 < 2.2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1